# -*- coding: utf-8 -*-

from lxml import etree

try:
    fOut = file('processed/full', 'w')
    for i in range(1000):
        fIn = file('cache/response.%s'%i, 'r')
        try:
            context = etree.iterparse(fIn, tag='doc')

            for action,doc in context:
                passages = doc.getiterator('passages').next()
                url = doc.getiterator('url').next().text
                title = doc.getiterator('title').next().text.encode('utf-8')
                headline = None
                try:
                    headline = doc.getiterator('headline').next().text.encode('utf-8')
                except StopIteration:
                    pass
                fOut.write('%s\n%s\n%s\n'%(url,title,headline))
                mess = '\n'.join([p.text for p in passages.getiterator('passage')])
                fOut.write('%s\nend\n'%mess)
        finally:
            fIn.close()
finally:
    fOut.close()
